import requests
from lxml import etree

# 房天下数据
url = 'https://newhouse.fang.com/house/s/b95?ctm=1.bj.xf_search.page.6'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36',
}
resp = requests.get(url, headers=headers)
resp.encoding = 'utf-8'
e = etree.HTML(resp.text)

names = [n.strip() for n in e.xpath('//div[@class="nlcd_name"]/a/text()')]
addreses = e.xpath('//div[@class="address"]/a/@title')
prices = [d.xpath('string(.)').strip() for d in e.xpath('//div[@class="nhouse_price"]')]
areaDoms = e.xpath('//div[@class="house_type clearfix"]')
data_list = []
for name, addr, price, areaDom in zip(names, addreses, prices, areaDoms):
    areaE = etree.HTML(etree.tostring(areaDom))
    houseArea = list(filter(lambda e: e != '/', [a.strip() for a in areaE.xpath('//text()')]))
    # print(f"name：{name} addr：{addr} price：{price} houseArea：{houseArea}")
    data_list.append({
        'name': name,
        'addr': addr,
        'price': price,
        'houseArea': houseArea,
    })
print(data_list)
